/* Sort a file by lines.  Copyright (C) Richard M. Stallman 1984

   Permission is granted to anyone to make or distribute
   verbatim copies of this program
   provided that the copyright notice and this permission notice are preserved;
   and provided that the recipient is not asked to waive or limit his right to
   redistribute copies as permitted by this permission notice;
   and provided that anyone possessing a machine-executable copy
   is granted access to copy the source code, in machine-readable form,
   in some reasonable manner.

   Permission is granted to distribute derived works or enhanced versions of
   this program under the above conditions with the additional condition
   that the entire derivative or enhanced work
   must be covered by a permission notice identical to this one.

   Anything distributed as part of a package containing portions derived
   from this program, which cannot in current practice perform its function
   usefully in the absence of what was derived directly from this program,
   is to be considered as forming, together with the latter,
   a single work derived from this program,
   which must be entirely covered by a permission notice identical to this one
   in order for distribution of the package to be permitted.

 In other words, you are welcome to use, share and improve this program.
 You are forbidden to forbid anyone else to use, share and improve
 what you give them.   Help stamp out software-hoarding!  */

/*
To sort a small amount of data:

  read it all into core
  make a table of pointers to beginnings of lines
  sort the table
  write the lines in sequence.

To sort a large amount of data:

  split it up into temporary files short enough to sort in the above fashion
  sort each temporary file
  merge the temporary files together

To merge a few files:

  open them all, buffer one line of each file,
  and keep on outputting the line that is least.

To merge many files:

  divide them into groups of a few files, and merge each group into one file.
  merge the resulting files.
*/

#include <stdio.h>
#include <sys/file.h>
#include <sys/types.h>
#include <sys/stat.h>

/* When sorting in core, this structure describes one line
 and the position and length of its first keyfield.  */

struct lineinfo
  {
    char *text;		/* The actual text of the line */
    union
      {			/* The start of the key (for textual comparison) */
	char *text;
	long number;	/* or the numeric value (for numeric comparison) */
      } key;
    long keylen;	/* Length of key field */
  };

/* This structure describes a field to use as a sort key */

struct keyfield
  {
    int startwords;		/* # words to skip  */
    int startchars;		/*  and # additional chars to skip, to start of field */
    int endwords;		/* similar, from beg (or end) of line, to find end of field */
    int endchars;
    char ignore_blanks;		/* Ignore spaces and tabs within the field */
    char end_ignore_blanks;	/* Ignore spaces and tabs finding end of field */
    char ignore_punct;		/* Only letters, digits and blanks count */
    char ignore_nongraphic;	/* Ignore nonprinting chars (even ignore cr, lf, tab) */
    char fold_case;		/* Convert upper case to lower before comparing */
    char reverse;		/* Compare in reverse order */
    char numeric;		/* Parse text as an integer and compare the integers */
    char positional;		/* Sort according to position within the file */
  };

/* This keyfield is used if none are specified.
 Its flags are the global flags;
 if keyfields are specified, they inherit their flags
 from here if not overridden.  */

struct keyfield default_keyfield;

/* Pointer to vector of keyfields to use */

struct keyfield *keyfields;

/* Number of keyfields stored in that vector.  */

int num_keyfields;

/* Nonzero if the first keyfield is to be compared lexicographically
   without ignoring any characters or folding case.  */

int first_keyfield_normal;

/* Vector of input file names, terminated with a zero (null pointer) */

char **infiles;

/* Length of `infiles' */

int num_infiles;

/* Pointer to the array of pointers to lines being sorted */

char **linearray;

/* The allocated length of `linearray'. */

long lines;

/* Directory to use for temporary files */

char *tempdir;

/* Start of filename to use for temporary files.  It starts with a slash.  */

char *tempbase;

/* Number of last temporary file.  */

int tempcount;

/* Number of last temporary file already deleted.
 Temporary files are deleted by `flush_tempfiles' in order of creation.  */

int last_deleted_tempcount;

/* During in-core sort, this points to the base of the data block
 which contains all the lines of data.  */

char *text_base;

/* Additional command switches */

int delete_duplicates;	/* Nonzero means delete all but one of a set of matching lines */

int check;		/* Nonzero means verify that input files are already sorted. */

int merge;		/* Nonzero means assume each input file is already sorted.  */

char *outfile;		/* Nonzero means it is name to use for output file */

char *wordsep;		/* Nonzero means it is string that separates "words"
			  for purposes of scanning to find sort key */

int wordseplen;		/* Length of the string `wordsep' */

int keep_tempfiles;	/* Nonzero means do not delete tempfiles -- for debugging */

/* Forward declarations of functions in this file */

void decode_command ();
void parse_keyfield ();
void sort_in_core ();
void sort_offline ();
char **parsefile ();
char *find_field ();
char *find_pos ();
void writelines ();
int compare_full ();
int check_file ();
long readline ();
int merge_files ();
int merge_direct ();
char *concat ();
char *maketempname ();
void flush_tempfiles ();
char *tempcopy ();

extern char *mktemp ();

#define MAX_IN_CORE_SORT 500000

int
main (argc, argv)
     int argc;
     char **argv;
{
  int i;
  long total = 0;

  tempcount = 0;
  last_deleted_tempcount = 0;

  decode_command (argc, argv);

  tempbase = mktemp (concat ("/stmXXXXXX", "", ""));

  /* If an input file would be clobbered by our output, copy it to a tempfile first */

  if (outfile)
    for (i = 0; i < num_infiles; i++)
      if (!strcmp (infiles[i], outfile))
	{
	  int desc = open (infiles[i], 0, 0);
	  if (desc < 0)
	    pfatal_with_name (infiles[i]);
	  infiles[i] = tempcopy (desc);
	  close (desc);
	}

  /* Handle merging files already sorted */

  if (merge)
    {
      int value = merge_files (infiles, num_infiles, outfile);
      flush_tempfiles (tempcount);	/* Delete all temporary files still left */
      exit (value);
    }

  /* Handle checking that input files are already sorted */

  if (check)
    {	/* Just verify that each input file is itself sorted.  */
      int lossage = 0;
      char **ip;

      for (ip = infiles; *ip; ip++)
	lossage |= check_file (*ip);

      exit (lossage);
    }

  /* Here only for sorting.  */

  /* If standard input is one of our inputs, copy it to a temp file now */

  for (i = 0; i < num_infiles; i++)
    if (!strcmp (infiles[i], "-"))
      infiles[i] = tempcopy (fileno (stdin));

  /* Compute total size of files to be sorted */
  /* If any file is not random access, copy it to a temp file now */

  for (i = 0; i < num_infiles; i++)
    {
      int desc;
      long ptr;
      struct stat st;

    retry:
      if (stat (infiles[i], &st) < 0)
	pfatal_with_name (infiles[i]);
      if ((st.st_mode & S_IFMT) != S_IFREG)
	{
	  desc = open (infiles[i], 0, 0);
	  infiles[i] = tempcopy (desc);
	  close (desc);
	  goto retry;
	}
      total += st.st_size;
    }

  if (total < MAX_IN_CORE_SORT)
    /* Sort a small amount of data */
    sort_in_core (infiles, num_infiles, total, outfile);
  else
    sort_offline (infiles, num_infiles, total, outfile);

  flush_tempfiles (tempcount);
}

/* This page decodes the command line arguments to set the parameter variables
 and set up the vector of keyfields and the vector of input files */

void
decode_command (argc, argv)
     int argc;
     char **argv;
{
  int i;
  struct keyfield *kp;
  char **ip;

  /* Store default values into parameter variables */

  tempdir = "/tmp";
  delete_duplicates = 0;
  check = 0;
  merge = 0;
  outfile = 0;
  wordsep = 0;
  wordseplen = 0;
  keep_tempfiles = 0;

  bzero (&default_keyfield, sizeof (struct keyfield));
  default_keyfield.endwords = -1;	/* Field specified is the entire line */
  default_keyfield.endchars = -1;

  /* Allocate space for argc keyfields, as that cannot be too few.  Same for input files.  */

  keyfields = (struct keyfield *) xmalloc (argc * sizeof (struct keyfield));
  bzero (keyfields, argc * sizeof (struct keyfield));
  infiles = (char **) xmalloc (argc * sizeof (char *));

  /* First find all switches that control the default kind-of-sort */

  for (i = 1; i < argc; i++)
    {
      int tem = classify_arg (argv[i]);
      char c;
      char *p;

      if (tem <= 0) continue;
      if (tem > 1)
	{
	  if (i + 1 == argc)
	    fatal ("switch %s given with no argument following it", argv[i]);
	  if (!strcmp (argv[i], "-o"))
	    outfile = argv[i + 1];
	  else if (!strcmp (argv[i], "-T"))
	    tempdir = argv[i + 1];
	  else if (!strcmp (argv[i], "-t"))
	    {
	      wordsep = argv[i + 1];
	      wordseplen = strlen (wordsep);
	    }
	  i += tem - 1;
	  continue;
	}

      p = &argv[i][1];
      while (c = *p++)
	switch (c)
	  {
	  case 'b':
	    default_keyfield.ignore_blanks = 1;
	    default_keyfield.end_ignore_blanks = 1;
	    break;

	  case 'c':
	    check = 1;
	    break;

	  case 'd':
	    default_keyfield.ignore_punct = 1;
	    break;

	  case 'f':
	    default_keyfield.fold_case = 1;
	    break;

	  case 'k':
	    keep_tempfiles = 1;
	    break;

	  case 'i':
	    default_keyfield.ignore_nongraphic = 1;
	    break;

	  case 'm':
	    merge = 1;
	    break;

	  case 'n':
	    default_keyfield.numeric = 1;
	    default_keyfield.ignore_blanks = 1;
	    break;

	  case 'p':
	    default_keyfield.positional = 1;
	    break;

	  case 'r':
	    default_keyfield.reverse = 1;
	    break;

	  case 't':
	    wordsep = p;
	    wordseplen = strlen (p);
	    goto switchdone;

	  case 'u':
	    delete_duplicates = 1;
	    break;

	  default:
	    fatal ("invalid command switch %c", c);
	  }
    switchdone: ;
    }

  /* Then process the specifications of any keyfields, using those defaults */	
  /* Also separate out the input file names */

  kp = keyfields;
  ip = infiles;
  for (i = 1; i < argc; i++)
    {
      int tem = classify_arg (argv[i]);
      if (tem > 0)
	i += tem - 1;
      if (tem == 0)
	*ip++ = argv[i];
      if (tem < 0 && argv[i][0] == '+')
        parse_keyfield (kp++, argv[i], argv[i + 1]);
    }

  if (kp == keyfields)
    /* Use the default keyfield if none are specified */
    bcopy (&default_keyfield, kp++, sizeof (struct keyfield));

  /* Add one more keyfield that compares all characters, ignoring nothing.  */
  /* This is the last-resort means of comparison */
  kp->startwords = 0;
  kp->startchars = 0;
  kp->endwords = -1;
  kp->endchars = -1;
  kp++;

  /* Record number of keyfields, terminate list of filenames */

  num_keyfields = kp - keyfields;
  num_infiles = ip - infiles;
  if(num_infiles == 0) {/* JF if no input, read stdin */
    *ip++="-";
    num_infiles++;
  }
  *ip = 0;

  first_keyfield_normal
    = !keyfields[0].ignore_punct && !keyfields[0].ignore_nongraphic
      && !keyfields[0].fold_case && !keyfields[0].numeric
      && !keyfields[0].positional;
}

/* Parse the one or two arguments that specify a single keyfield */
/* The argument `keyfield' points to the structure in which to store
 the results of parsing them.
 We know, when this function is called, that `fromarg' is a string
 starting with "+" that says where the field starts.
 If `toarg' starts with "-", it describes where the field ends.
 Otherwise, ignore it.  */

void
parse_keyfield (keyfield, fromarg, toarg)
     register struct keyfield *keyfield;
     register char *fromarg;
     register char *toarg;
{
  char *p = fromarg + 1;

  keyfield->startwords = parse_int (&p);
  if (*p == '.')
    {
      p++;
      keyfield->startchars = parse_int (&p);
    }
  else keyfield->startchars = 0;

  if (*p == 0)
    {
      /* If fromarg contains no flag letters, use the defaults */

      keyfield->ignore_blanks = default_keyfield.ignore_blanks;
      keyfield->ignore_punct = default_keyfield.ignore_punct;
      keyfield->ignore_nongraphic = default_keyfield.ignore_nongraphic;
      keyfield->fold_case = default_keyfield.fold_case;
      keyfield->numeric = default_keyfield.numeric;
      keyfield->reverse = default_keyfield.reverse;
    }
  else  /* Parse the flag letters and ignore the default flags */
    while (*p)
      switch (*p++)
	{
	case 'a':	/* `a' as a flag means "ignore the defaults, and set no flags" */
	  break;

	case 'b':
	  keyfield->ignore_blanks = 1;
	  break;

	case 'd':
	  keyfield->ignore_punct = 1;
	  break;

	case 'f':
	  keyfield->fold_case = 1;
	  break;

	case 'i':
	  keyfield->ignore_nongraphic;
	  break;

	case 'n':
	  keyfield->numeric = 1;
	  keyfield->ignore_blanks = 1;
	  break;

	case 'p':
	  keyfield->positional = 1;
	  break;

	case 'r':
	  keyfield->reverse = 1;
	  break;

	default:
	  error ("invalid key specifier %s", fromarg);
	  goto endfrom;
	}

 endfrom:

  if (toarg[0] != '-')
    {
      /* If the following arg does not start with minus, it is not really
	 associated with this keyfield, so we have no end specified.  Default
	 to the end of the line.  */
      keyfield->endwords = -1;
      keyfield->endchars = -1;
      return;
    }

  p = toarg + 1;

  keyfield->endwords = parse_int (&p);
  if (*p == '.')
    {
      p++;
      keyfield->endchars = parse_int (&p);
    }
  else keyfield->endchars = 0;

  if (*p == 'b')
    {
      keyfield->end_ignore_blanks = 1;
      p++;
    }

  if (*p)
    error ("invalid key specifier %s", fromarg);
}

/* Return 0 for an argument that is not a switch;
 for a switch, return 1 plus the number of following arguments that the switch swallows.
 Return -1 for a switch that is part of a keyfield specification */

int
classify_arg (arg)
     register char *arg;
{
  if (!strcmp (arg, "-T") || !strcmp (arg, "-o") || !strcmp (arg, "-t"))
    return 2;
  if (arg[0] == '-' && arg[1] && (arg[1] < '0' || arg[1] > '9'))
    return 1;
  if (arg[1] == 0)	/* Just - is not a switch; it means read stdin.  */
    return 0;
  if (arg[0] == '-' || arg[0] == '+')
    return -1;
  return 0;
}

/* Create a name for a temporary file */

char *
maketempname (count)
     int count;
{
  char tempsuffix[10];
  sprintf (tempsuffix, "%d", count);
  return concat (tempdir, tempbase, tempsuffix);
}

/* Delete all temporary files up to the specified count */

void
flush_tempfiles (to_count)
     int to_count;
{
  if (keep_tempfiles) return;
  while (last_deleted_tempcount < to_count)
    unlink (maketempname (++last_deleted_tempcount));
}

/* Copy an input file into a temporary file, and return the temporary file name */

#define BUFSIZE 1024

char *
tempcopy (idesc)
     register int idesc;
{
  char *outfile = maketempname (++tempcount);
  register int odesc;
  char buffer[BUFSIZE];

  odesc = open (outfile, O_WRONLY | O_CREAT, 0666);

  if (odesc < 0) pfatal_with_name (outfile);

  while (1)
    {
      register int nread = read (idesc, buffer, BUFSIZE);
      write (odesc, buffer, nread);
      if (!nread) break;
    }

  close (odesc);

  return outfile;
}

/* Compare two lines, provided as pointers to pointers to text,
 according to the specified set of keyfields */

int
compare_full (line1, line2)
     register char **line1, **line2;
{
  register int i;

  /* Compare using the first keyfield;
     if that does not distinguish the lines, try the second keyfield; and so on. */

  for (i = 0; i < num_keyfields; i++)
    {
      long length1, length2;
      char *start1 = find_field (&keyfields[i], *line1, &length1);
      char *start2 = find_field (&keyfields[i], *line2, &length2);
      register int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base,
					      start2, length2, *line2 - text_base);
      if (tem)
	{
	  if (keyfields[i].reverse)
	    return - tem;
          return tem;
	}
    }

  return 0;    /* Lines match exactly */
}

/* Compare two entire lines lexicographically,
   with calling conventions for use with qsort.  */

int
compare_simple (line1, line2)
     char **line1, **line2;
{
  register char *p1 = *line1, *p2 = *line2;
  register int c, diff;

  while (1)
    {
      c = *p1++;
      diff = c - *p2++;
      if (diff)
	return diff;
      if (!c)
	return 0;
    }
}

/* Compare two lines described by structures
 in which the first keyfield is identified in advance.
 For positional sorting, assumes that the order of the lines in core
 reflects their nominal order.  */

int
compare_prepared (line1, line2)
     struct lineinfo *line1, *line2;
{
  register int i;
  register int tem;
  char *text1, *text2;

  if (first_keyfield_normal)
    {
      register char *p1 = line1->key.text;
      register char *p2 = line2->key.text;
      i = line1->keylen;
      if (i > line2->keylen)
	i = line2->keylen;
      
      while (i-- > 0)
	{
	  if (tem = (*p1++ - *p2++))
	    return (keyfields->reverse ? - tem : tem);
	}

      tem = line1->keylen - line2->keylen;
    }
  /* Compare using the first keyfield, which has been found for us already */
  else if (keyfields->positional)
    {
      if (line1->text - text_base > line2->text - text_base)
	tem = 1;
      else
	tem = -1;
    }
  else if (keyfields->numeric)
    tem = line1->key.number - line2->key.number;
  else
    tem = compare_field (keyfields, line1->key.text, line1->keylen, 0, line2->key, line2->keylen, 0);
  if (tem)
    {
      if (keyfields->reverse)
	return - tem;
      return tem;
    }

  text1 = line1->text;
  text2 = line2->text;

  /* Compare using the second keyfield;
     if that does not distinguish the lines, try the third keyfield; and so on. */

  for (i = 1; i < num_keyfields; i++)
    {
      long length1, length2;
      char *start1 = find_field (&keyfields[i], text1, &length1);
      char *start2 = find_field (&keyfields[i], text2, &length2);
      int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base,
					      start2, length2, text2 - text_base);
      if (tem)
	{
	  if (keyfields[i].reverse)
	    return - tem;
          return tem;
	}
    }

  return 0;    /* Lines match exactly */
}

/* Like compare_full but more general.
 You can pass any strings, and you can say how many keyfields to use.
 `pos1' and `pos2' should indicate the nominal positional ordering of
 the two lines in the input.  */

int
compare_general (str1, str2, pos1, pos2, use_keyfields)
     char *str1, *str2;
     long pos1, pos2;
     int use_keyfields;
{
  register int i;

  /* Compare using the first keyfield;
     if that does not distinguish the lines, try the second keyfield; and so on. */

  for (i = 0; i < use_keyfields; i++)
    {
      long length1, length2;
      register char *start1 = find_field (&keyfields[i], str1, &length1);
      register char *start2 = find_field (&keyfields[i], str2, &length2);
      register int tem = compare_field (&keyfields[i], start1, length1, pos1, start2, length2, pos2);
      if (tem)
	{
	  if (keyfields[i].reverse)
	    return - tem;
          return tem;
	}
    }

  return 0;    /* Lines match exactly */
}

/* Find the start and length of a field in `str' according to `keyfield'.
 A pointer to the starting character is returned, and the length
 is stored into the int that `lengthptr' points to.  */

char *
find_field (keyfield, str, lengthptr)
     register struct keyfield *keyfield;
     register char *str;
     long *lengthptr;
{
  register char *start = find_pos (str, keyfield->startwords,
				   keyfield->startchars,
				   keyfield->ignore_blanks);
  register char *end;
  if (keyfield->endwords < 0)
    {
      end = start;
      while (*end && *end != '\n') end++;
    }
  else
    {
      end = find_pos (str, keyfield->endwords, keyfield->endchars,
       		      keyfield->end_ignore_blanks);
      if (end - str < start - str) end = start;
    }
  *lengthptr = end - start;
  return start;
}

/* Find a pointer to a specified place within `str',
 skipping (from the beginning) `words' words and then `chars' chars.
 If `ignore_blanks' is nonzero, we skip all blanks
 after finding the specified word.  */

char *
find_pos (str, words, chars, ignore_blanks)
     char *str;
     int words, chars;
     int ignore_blanks;
{
  register int i;
  register char *p = str;

  for (i = 0; i < words; i++)
    {
      if (wordseplen)
	{
	  /* -t specified: search for next occurrence of separator string and skip it. */
	  register char *p1 = p;
	  register char c = wordsep[0];
	  register char c1;

          while ((c1 = *p1) && c1 != '\n'
		 && (c1 != c || strncmp (p1, wordsep, wordseplen)))
	    p1++;
	  if (*p1)
	    p = p1 + wordseplen;
	  else
	    return p1;
	}
      else
	{
	  register char c;
	  /* No -t; find next bunch of nonblanks and skip them. */
	  while ((c = *p) == ' ' || c == '\t') p++;
	  while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t')) p++;
	}
      if (!*p || *p == '\n') return p;
    }

  if (ignore_blanks)
    while (*p == ' ' || *p == '\t') p++;

  for (i = 0; i < chars; i++)
    {
      if (!*p  || *p == '\n') break;
      p++;
    }
  return p;
}

/* Compare two fields (each specified as a start pointer and a character count)
 according to `keyfield'.  The sign of the value reports the relation between the fields */

int
compare_field (keyfield, start1, length1, pos1, start2, length2, pos2)
     struct keyfield *keyfield;
     char *start1;
     long length1;
     long pos1;
     char *start2;
     long length2;
     long pos2;
{
  if (keyfields->positional)
    {
      if (pos1 > pos2)
	return 1;
      else
	return -1;
    }
  if (keyfield->numeric)
    {
      register long value = atol (start1) - atol (start2);
      if (value > 0) return 1;
      if (value < 0) return -1;
      return 0;
    }
  else
    {
      register char *p1 = start1;
      register char *p2 = start2;
      char *e1 = start1 + length1;
      register char *e2 = start2 + length2;

      int ignore_punct = keyfield->ignore_punct;
      int ignore_nongraphic = keyfield->ignore_nongraphic;
      int fold_case = keyfield->fold_case;

      while (1)
	{
	  register char c1, c2;

	  if (p1 == e1) c1 = 0;
	  else c1 = *p1++;
	  if (p2 == e2) c2 = 0;
	  else c2 = *p2++;

	  /* Find the next significant character in field 1 */

	  while ((ignore_nongraphic && c1 && (c1 < 040 || c1 == 0177))
		 || (ignore_punct && c1
				  && !(c1 & ~040 >= 'A' && c1 & ~040 <= 'Z')
				  && !(c1 >= '0' && c1 <= '9')
				  && c1 != ' ' && c1 != '\t'))
	    {
	      if (p1 == e1) c1 = 0;
	      else c1 = *p1++;
	    }

	  /* Find the next significant character in field 2 */

	  while ((ignore_nongraphic && c2 && (c2 < 040 || c2 == 0177))
		 || (ignore_punct && c2
				  && !(c2 & ~040 >= 'A' && c2 & ~040 <= 'Z')
				  && !(c2 >= '0' && c2 <= '9')
				  && c2 != ' ' && c2 != '\t'))
	    {
	      if (p2 == e2) c2 = 0;
	      else c2 = *p2++;
	    }

	  /* Ignore case of both if desired */

	  if (fold_case)
	    {
	      if (c1 >= 'A' && c1 <= 'Z') c1 = c1 + 040;
	      if (c2 >= 'A' && c2 <= 'Z') c2 = c2 + 040;
	    }

	  /* Actually compare */

	  if (c1 != c2) return c1 - c2;
	  if (!c1) break;
	}
      return 0;
    }
}

/* A `struct linebuffer' is a structure which holds a line of text.
 `readline' reads a line from a stream into a linebuffer
 and works regardless of the length of the line.  */

struct linebuffer
  {
    long size;
    char *buffer;
  };

/* Initialize a linebuffer for use */

void
initbuffer (linebuffer)
     register struct linebuffer *linebuffer;
{
  linebuffer->size = 200;
  linebuffer->buffer = (char *) xmalloc (200);
}

/* Read a line of text from `stream' into `linebuffer'.
 Return the length of the line.  */

long
readline (linebuffer, stream)
     register struct linebuffer *linebuffer;
     FILE *stream;
{
  register char *buffer = linebuffer->buffer;
  register char *p = linebuffer->buffer;
  register char *end = p + linebuffer->size;

  while (1)
    {
      register int c = getc (stream);
      if (p == end)
	{
	  buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
	  p += buffer - linebuffer->buffer;
	  end += buffer - linebuffer->buffer;
	  linebuffer->buffer = buffer;
	}
      if (c < 0 || c == '\n')
	{
	  *p = 0;
	  break;
	}
      *p++ = c;
    }

  return p - buffer;
}

/* Sort the input files together when they are too big to sort in core */

void
sort_offline (infiles, nfiles, total, outfile)
     char **infiles;
     int nfiles;
     long total;
     char *outfile;
{
  int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;  /* More than enough */
  char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
  int infilenum = 0;
  FILE *istream = fopen (infiles[infilenum], "r");
  int i;
  struct linebuffer lb;
  long linelength;

  initbuffer (&lb);

  /* Read in one line of input data.  */

  while (infilenum < nfiles)
    {
      linelength = readline (&lb, istream);
      if (linelength) break;
      if (feof (istream))
	{
	  fclose (istream);
	  if (infilenum < nfiles)
	    {
	      istream = fopen (infiles[infilenum], "r");
	      if (istream == 0)
		perror_with_name (infiles[infilenum]);
	    }
	}
    }

  /* Split up the input into `ntemps' temporary files, or maybe fewer,
    and put the new files' names into `tempfiles' */

  for (i = 0; i < ntemps && infilenum < nfiles; i++)
    {
      char *outname = maketempname (++tempcount);
      FILE *ostream = fopen (outname, "w");
      long tempsize = 0;

      if (!ostream) pfatal_with_name (outname);
      tempfiles[i] = outname;

      /* Copy lines into this temp file as long as it does not make file "too big"
	or until there are no more lines.  */

      while (infilenum < nfiles && tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
	{
	  tempsize += linelength + 1;
	  fputs (lb.buffer, ostream);
	  putc ('\n', ostream);

	  /* Read another line of input data.  Move to next file whenever necessary.  */

	  while (infilenum < nfiles)
	    {
	      linelength = readline (&lb, istream);
	      if (linelength) break;
	      if (feof (istream))
		{
		  fclose (istream);
		  if (++infilenum < nfiles)
		    {
		      istream = fopen (infiles[infilenum], "r");
		      if (istream == 0)
			perror_with_name (infiles[infilenum]);
		    }
		}
	      else break;
	    }
	}
      fclose (ostream);
    }

  free (lb.buffer);

  /* Record number of temp files we actually needed.  */

  ntemps = i;

  /* Sort each tempfile into another tempfile.
    Delete the first set of tempfiles and put the names of the second into `tempfiles' */

  for (i = 0; i < ntemps; i++)
    {
      char *newtemp = maketempname (++tempcount);
      sort_in_core (&tempfiles[i], 1, MAX_IN_CORE_SORT, newtemp);
      if (!keep_tempfiles)
        unlink (tempfiles[i]);
      tempfiles[i] = newtemp;
    }

  /* Merge the tempfiles together */

  merge_files (tempfiles, ntemps, outfile);
}

/* Sort `nfiles' files (whose names are in `files')
 and whose total size is `total',
 assuming that is small enough to be done in-core,
 and send the output to `outfile' (or to stdout).  */

void
sort_in_core (files, nfiles, total, outfile)
     char **files;
     int nfiles;
     long total;
     char *outfile;
{
  char **nextline;
  char *data = (char *) xmalloc (total + nfiles);
  char *data_left = data;
  char **file_data = (char **) xmalloc (nfiles * sizeof (char *));
  long *file_size = (long *) xmalloc (nfiles * sizeof (long));
  int i;
  FILE *ostream = stdout;
  struct lineinfo *lineinfo;

  /* Read the contents of all the files into the moby array `data', with nulls between */

  for (i = 0; i < nfiles; i++)
    {
      int desc = open (files[i], 0, 0);
      long size;

      if (desc < 0)
	fatal ("failure reopening %s", files[i]);
      lseek (desc, 0L, L_XTND);
      size = tell (desc);
      lseek (desc, 0L, 0);
      if (size != read (desc, data_left, size))
	fatal ("wrong amount of data read from %s", files[i]);
      file_data[i] = data_left;
      file_size[i] = size;
      data_left += size;
      *data_left++ = 0;

      close (desc);
    }

  /* Sort routines want to know this address */

  text_base = data;

  /* Create the array of pointers to lines, with a default size frequently enough.  */

  lines = total / 50;
  linearray = (char **) xmalloc (lines * sizeof (char *));

  /* `nextline' points to the next free slot in this array.
     `lines' is the allocated size.  */

  nextline = linearray;

  /* Read in the input files' data, and make entries for the lines in them.  */

  for (i = 0; i < nfiles; i++)
    nextline = parsefile (files[i], nextline, file_data[i], file_size[i]);

  /* Sort the lines */

  /* If we are sorting by the full text of the line,
     use special superfast code for this case,
     since the Unix sort does and we want this program
     always to be faster than it.  */

  if (first_keyfield_normal
      && keyfields[0].startwords == 0
      && keyfields[0].startchars == 0
      && keyfields[0].endwords == -1
      && keyfields[0].endchars == -1
      && !keyfields[0].reverse && !keyfields[0].ignore_blanks)
    qsort (linearray, nextline - linearray, sizeof (char *), compare_simple);
  else
    {
      /* If we have enough space, find the first keyfield of each line in advance.
	Make a `struct lineinfo' for each line, which records the keyfield
	as well as the line, and sort them.  */

      lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo));

      if (lineinfo)
	{
	  register struct lineinfo *lp;
	  register char **p;

	  for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
	    {
	      lp->text = *p;
	      lp->key.text = find_field (keyfields, *p, &lp->keylen);
	      if (keyfields->numeric)
		lp->key.number = atol (lp->key.text);
	    }

	  qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), compare_prepared);

	  for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
	    *p = lp->text;
	}
      else
	qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
    }
  /* Open the output file */

  if (outfile)
    {
      ostream = fopen (outfile, "w");
      if (!ostream)
	pfatal_with_name (outfile);
    }

  writelines (linearray, nextline - linearray, ostream);
  if (outfile) fclose (ostream);

  free (linearray);
  free (file_size);
  free (file_data);
  free (data);
}

/* Parse an input string in core into lines.
 `data' is the input string, and `size' is its length.
 Data goes in `linearray' starting at `nextline'.
 The value returned is the first entry in `linearray' still unused.

 Each newline is replaced with a null, so that the lines that are
 parsed are also null-terminated strings.  */

char **
parsefile (filename, nextline, data, size)
     char *filename;
     char **nextline;
     char *data;
     long size;
{
  register char *p, *end;
  register char **line = nextline;

  p = data;
  end = p + size;
  *end = '\n';

  while (p != end)
    {
      *line = p;
      while (*p++ != '\n');
      *--p = 0;
      if (p != end) p++;

  /* This feature will be installed later.  */
  /*      if (discard_empty_lines && p == *line + 1) continue;  */

      line++;
      if (line == linearray + lines)
	{
	  char **old = linearray;
	  linearray = (char **) xrealloc (linearray, sizeof (char *) * (lines *= 4));
	  line += linearray - old;
	}
    }

  return line;
}

/* Copy the lines in the sorted order.
 Each line is copied out of the input file it was found in. */

void
writelines (linearray, nlines, ostream)
     char **linearray;
     int nlines;
     register FILE *ostream;
{
  register char **stop_line = linearray + nlines;
  register char **next_line;

  /* Output the text of the lines, and free the buffer space */

  for (next_line = linearray; next_line != stop_line; next_line++)
    {
      /* If -u was specified, output the line only if distinct from previous one.  */
      if (!delete_duplicates || next_line == linearray
	  /* Compare previous line with this one, using only the explicitly specd keyfields */
	  || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1))
	{
	  fputs (*next_line, ostream);
          putc ('\n', ostream);
	}
    }
}

/* Check that a file's lines are already sorted according to the specified keys */
/* Print one message only if the file is not sorted.
 Also return nonzero if the file is not sorted.  */

int
check_file (filename)
     char *filename;
{
  register FILE *istream = fopen (filename, "r");
  struct linebuffer lb1, lb2;
  register struct linebuffer *thisline, *prevline, *exch;
  register int failure = 0;
  int comp;
  int simple_compare =
    (first_keyfield_normal
     && keyfields[0].startwords == 0
     && keyfields[0].startchars == 0
     && keyfields[0].endwords == -1
     && keyfields[0].endchars == -1
     && !keyfields[0].reverse && !keyfields[0].ignore_blanks);
  struct lineinfo prevkey, thiskey;

  if (!istream)
    {
      perror_with_name (filename);
      return 0;
    }

  thisline = &lb1;
  prevline = &lb2;

  initbuffer (thisline);
  initbuffer (prevline);

  readline (prevline, istream);
  thiskey.text = thisline->buffer;
  thiskey.key.text = find_field (keyfields, thisline->buffer, &thiskey.keylen);
  if (keyfields->numeric)
    thiskey.key.number = atol (thiskey.key.text);

  while (!feof (istream))
    {
      readline (thisline, istream);
      if (!thisline->buffer[0] && feof (istream)) break;
      if (simple_compare)
	comp = strcmp (prevline->buffer, thisline->buffer);
      else
	{
	  /* Save info on 1st field of prev line */
	  prevkey = thiskey;
	  /* Get info on 1st field of this line */
	  thiskey.text = thisline->buffer;
	  thiskey.key.text = find_field (keyfields, thisline->buffer, &thiskey.keylen);
	  if (keyfields->numeric)
	    thiskey.key.number = atol (thiskey.key.text);
	  /* Compare the two lines using precomputed info on 1st field */
	  comp = compare_prepared (&prevkey, &thiskey);
	}
      if (comp > 0)
	{
	  printf ("sort: disorder:%s\n", thisline->buffer);
	  failure = 1;
	  break;
	}

      /* Make this line become the previous line.
	 Make the old previous line's buffer be the buffer
	 for thenext line.  */
      exch = prevline;  prevline = thisline; thisline = exch;
    }

  fclose (istream);
  free (lb1.buffer);
  free (lb2.buffer);

  return failure;
}

/* Assume (and optionally verify) that each input file is sorted;
 merge them and output the result.
 Returns nonzero if any input file fails to be sorted.

 This is the high-level interface that can handle an unlimited number of files.  */

#define MAX_DIRECT_MERGE 10

int
merge_files (infiles, nfiles, outfile)
     char **infiles;
     int nfiles;
     char *outfile;
{
  register char **tempfiles;
  register int ntemps;
  register int i;
  int value = 0;
  int start_tempcount = tempcount;

  if (nfiles <= MAX_DIRECT_MERGE)
    return merge_direct (infiles, nfiles, outfile);

  /* Merge groups of MAX_DIRECT_MERGE input files at a time,
     making a temporary file to hold each group's result.  */

  ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
  tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
  for (i = 0; i < ntemps; i++)
    {
      register int nf = MAX_DIRECT_MERGE;
      if (i + 1 == ntemps)
	nf = nfiles - i * MAX_DIRECT_MERGE;
      tempfiles[i] = maketempname (++tempcount);
      value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
    }

  /* All temporary files that existed before are no longer needed
     since their contents have been merged into our new tempfiles.
     So delete them.  */
  flush_tempfiles (start_tempcount);

  /* Now merge the temporary files we created.  */

  merge_files (tempfiles, ntemps, outfile);

  free (tempfiles);

  return value;
}  

/* Assume (and optionally verify) that each input file is sorted;
 merge them and output the result.
 Returns nonzero if any input file fails to be sorted.

 This version of merging will not work if the number of
 input files gets too high.  Higher level functions
 use it only with a bounded number of input files.  */

int
merge_direct (infiles, nfiles, outfile)
     char **infiles;
     int nfiles;
     char *outfile;
{
  char **ip = infiles;
  struct linebuffer *lb1, *lb2;
  register struct linebuffer **thisline;
  struct linebuffer **prevline;
  FILE **streams;
  int i;
  int nleft;
  int lossage = 0;
  int *file_lossage;
  struct linebuffer *prev_out = 0;
  FILE *ostream = stdout;

  if (outfile)
    ostream = fopen (outfile, "w");
  if (!ostream) pfatal_with_name (outfile);

  if (nfiles == 0)
    {
      if (outfile)
        fclose (ostream);
      return 0;
    }

  /* For each file, make two line buffers.
     Also, for each file, there is an element of `thisline'
     which points at any time to one of the file's two buffers,
     and an element of `prevline' which points to the other buffer.
     `thisline' is supposed to point to the next available line from the file,
     while `prevline' holds the last file line used,
     which is remembered so that we can verify that the file is properly sorted. */

  /* lb1 and lb2 contain one buffer each per file */
  lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
  lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));

  /* thisline[i] points to the linebuffer holding the next available line in file i,
     or is zero if there are no lines left in that file.  */
  thisline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *));
  /* prevline[i] points to the linebuffer holding the last used line from file i.
     This is just for verifying that file i is properly sorted.  */
  prevline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *));
  /* streams[i] holds the input stream for file i.  */
  streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
  /* file_lossage[i] is nonzero if we already know file i is not properly sorted.  */
  file_lossage = (int *) xmalloc (nfiles * sizeof (int));

  /* Allocate and initialize all that storage */

  for (i = 0; i < nfiles; i++)
    {
      initbuffer (&lb1[i]);
      initbuffer (&lb2[i]);
      thisline[i] = &lb1[i];
      prevline[i] = &lb2[i];
      file_lossage[i] = 0;
      streams[i] = fopen (infiles[i], "r");
      if (!streams[i])
	pfatal_with_name (infiles[i]);

      readline (thisline[i], streams[i]);
    }

  /* Keep count of number of files not at eof */
  nleft = nfiles;

  while (nleft)
    {
      register struct linebuffer *best = 0;
      register struct linebuffer *exch;
      register int bestfile = -1;
      register int i;

      /* Look at the next avail line of each file; choose the least one.  */

      for (i = 0; i < nfiles; i++)
	{
	  if (thisline[i] &&
	      (!best ||
	       0 < compare_general (best->buffer, thisline[i]->buffer,
				    (long) bestfile, (long) i, num_keyfields)))
	    {
	      best = thisline[i];
	      bestfile = i;
	    }
	}

      /* Output that line, unless it matches the previous one and we don't want duplicates */

      if (!(prev_out && delete_duplicates &&
	    !compare_general (prev_out->buffer, best->buffer, 0L, 1L, num_keyfields - 1)))
	{
	  fputs (best->buffer, ostream);
	  putc ('\n', ostream);
	}
      prev_out = best;

      /* Now make the line the previous of its file, and fetch a new line from that file */

      exch = prevline[bestfile];
      prevline[bestfile] = thisline[bestfile];
      thisline[bestfile] = exch;

      while (1)
	{
	  /* If the file has no more, mark it empty */

	  if (feof (streams[bestfile]))
	    {
	      thisline[bestfile] = 0;
	      nleft--;		/* Update the number of files still not empty */
	      break;
	    }
	  readline (thisline[bestfile], streams[bestfile]);
	  if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile])) break;
	}

      /* Check that the file's new line is, as it should be, greater than
	 the one just output */

      if (check && thisline[bestfile] &&
	  compare_general (prevline[bestfile]->buffer, thisline[bestfile]->buffer, 0L, 1L, num_keyfields) > 0 &&
	  !file_lossage[bestfile])
	{
	  fprintf (stderr, "lines of file %s are not properly sorted\n", infiles[bestfile]);
	  file_lossage[bestfile] = 1;
	  lossage = 1;
	}

    }

  /* Free all storage and close all input streams */

  for (i = 0; i < nfiles; i++)
    {
      fclose (streams[i]);
      free (lb1[i].buffer);
      free (lb2[i].buffer);
    }
  free (file_lossage);
  free (lb1);
  free (lb2);
  free (thisline);
  free (prevline);
  free (streams);

  if (outfile)
    fclose (ostream);

  return lossage;
}

/* Print error message and exit.  */

fatal (s1, s2)
     char *s1, *s2;
{
  error (s1, s2);
  exit (1);
}

/* Print error message.  `s1' is printf control string, `s2' is arg for it. */

error (s1, s2)
     char *s1, *s2;
{
  printf ("sort: ");
  printf (s1, s2);
  printf ("\n");
}

perror_with_name (name)
     char *name;
{
  extern int errno, sys_nerr;
  extern char *sys_errlist[];
  char *s;

  if (errno < sys_nerr)
    s = concat ("", sys_errlist[errno], " for %s");
  else
    s = "cannot open %s";
  error (s, name);
}

pfatal_with_name (name)
     char *name;
{
  extern int errno, sys_nerr;
  extern char *sys_errlist[];
  char *s;

  if (errno < sys_nerr)
    s = concat ("", sys_errlist[errno], " for %s");
  else
    s = "cannot open %s";
  fatal (s, name);
}

/* Return a newly-allocated string whose contents concatenate those of s1, s2, s3.  */

char *
concat (s1, s2, s3)
     char *s1, *s2, *s3;
{
  register int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
  register char *result = (char *) xmalloc (len1 + len2 + len3 + 1);

  strcpy (result, s1);
  strcpy (result + len1, s2);
  strcpy (result + len1 + len2, s3);
  *(result + len1 + len2 + len3) = 0;

  return result;
}

/* Like malloc but get fatal error if memory is exhausted.  */

int
xmalloc (size)
     int size;
{
  register int result = malloc (size);
  if (!result)
    fatal ("virtual memory exhausted", 0);
  return result;
}


int
xrealloc (ptr, size)
     char *ptr;
     int size;
{
  register int result = realloc (ptr, size);
  if (!result)
    fatal ("virtual memory exhausted");
  return result;
}

/* Parse an integer in decimal off string `*p' points to,
 and update *p to point at the terminating character.
 The integer is returned as the value.  */

int
parse_int (p)
     register char **p;
{
  register int val = atoi (*p);

  while (**p >= '0' && **p <= '9') (*p)++;

  return val;
}
